import requests
from bs4 import BeautifulSoup
import json

baseUrl = 'https://www.kuaidaili.com/free/inha/'
proxies = [
    {
        'HTTP': '114.231.41.71:8888'
    },
    {
        'HTTP': '60.170.204.30:8060'
    },
    {
        'HTTP': '113.121.20.68:9999'
    },
    {
        'HTTP': '61.216.156.222:60808'
    },
    {
        'HTTP': '222.74.73.202:42055'
    },
    {
        'HTTP': '182.34.33.117:9999'
    }
]

headers = {
    "authority": "www.kuaidaili.com",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "accept-language": "zh-CN,zh;q=0.9",
    "referer": "https://www.kuaidaili.com/free/inha/",
    "sec-ch-ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"macOS\"",
    "sec-fetch-dest": "document",
    "sec-fetch-mode": "navigate",
    "sec-fetch-site": "same-origin",
    "sec-fetch-user": "?1",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
    "Accept-Encoding": "deflate, gzip"
}


def get_proxies_in_net(page):
    if page == 1:
        url = baseUrl
    else:
        url = baseUrl + page + '/'
    return requests.request("GET", url, headers=headers)


def get_proxies_by_page(soup):
    ori_proxies_list = []
    for trItem in soup.select("table.table>tbody>tr"):
        td_list = trItem.select("td")
        ori_proxies_list.append({
            'ip': td_list[0].text,
            'port': td_list[1].text,
            'hidden': td_list[2].text,
            'type': td_list[3].text,
            'district': td_list[4].text,
            'speed': td_list[5].text,
            'last_time': td_list[6].text,
            'payWay': td_list[7].text
        })
    return ori_proxies_list


def get_proxies():
    proxy_list = []
    for current_page in range(1, 2):
        one_page = get_proxies_in_net(current_page)
        soup = BeautifulSoup(one_page.text, 'lxml')
        ori_proxies = get_proxies_by_page(soup)
        for ori_proxy in ori_proxies:
            proxy = {}
            proxy[ori_proxy['type']] = ori_proxy['ip'] + ':' + ori_proxy['port']
            proxy_list.append(proxy)
    proxies = proxy_list
    with open('proxies_list_file.json', 'w') as file:
        file.write(json.dumps(proxies, sort_keys=True, indent=2))
        file.close()
    return proxy_list


get_proxies()
